package com.wechat.back.utils;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

public class ChineseStopWordRemover {
    private static final Set<String> DEFAULT_STOP_WORDS = new HashSet<>(Arrays.asList(
            "的", "是", "了", "和", "与", "也", "在", "有", "为", "以", "于", "上", "下", "来", "去", "到", "从","试","测","喜",
            "欢","览","游","爱","坐","躺"
    ));
    private Set<String> stopWords;

    public ChineseStopWordRemover() {
        this.stopWords = new HashSet<>(DEFAULT_STOP_WORDS);
    }

    public ChineseStopWordRemover(Set<String> stopWords) {
        this.stopWords = new HashSet<>(stopWords);
        this.stopWords.addAll(DEFAULT_STOP_WORDS);
    }

    public String removeStopWords(String text) {
        StringBuilder result = new StringBuilder();
        for (int i = 0; i < text.length(); i++) {
            String word = String.valueOf(text.charAt(i));
            if (!stopWords.contains(word)) {
                result.append(word);
            }
        }
        return result.toString();
    }
}